﻿#coding:utf-8
import os
import urllib2
import html_outputer
import html_parser
import html_downloader
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding("utf-8")

# dir = 'temp/'

# if not os.path.exists(dir):
	# os.mkdir(dir)
# line = 'http://www.wandoujia.com/apps/cn.jsx.tuokouxiu'

# parser = html_parser.HtmlParser()
# downloader = html_downloader.HtmlDownloader()
# db_outputer = html_outputer.DbOutputer()

# db_outputer.load()
# d = parser.app_detail_msg(downloader.download(line))
# app_pkg = line[30:]
# d['app_category'] = 'music'
# d['app_pkg'] = app_pkg
# d['app_url'] = line
# db_outputer.output(d)
# print '---output app msg over---'

# db_outputer.close()


url = 'http://www.wandoujia.com/apps/com.tencent.mm'

headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
	'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
	'Referer': 'http://www.wandoujia.com/app'
}
request = urllib2.Request(url, headers = headers)
response = urllib2.urlopen(request)
content = response.read().decode('utf-8')

soup = BeautifulSoup(content)

d = dict()
d['app_name'] = soup.find('span',class_='title').get_text()
d['app_size'] = soup.find('meta',itemprop='fileSize')['content']
d['category_list'] = soup.find('dd',class_='tag-box').get_text().strip()
d['app_version'] = soup.find_all('dd')[3].get_text()
d['app_system'] = soup.find('dd',class_='perms').next_element.strip()
d['app_source'] = soup.find_all('dd')[5].get_text().strip()
d['app_time'] = soup.find('time', itemprop='datePublished').get_text()


print d


























